

set scheme plotplainblind


********************************************************************************
*New 2-dimensional capitalization figure
********************************************************************************
capture program drop capitalization_figure

program define capitalization_figure, rclass
	   local less_observable_vars="ductravg duct_type negleakavg rsavg atticrvalue eeravg"
	   local more_observable_vars="ah_location sizeavg progtherm fftype whftype negsysageavg "
	local other_less_observable_vars="ah_type notwinrec"
		local other_more_observable_vars="twosystems notraddr"

	   
	   local contsvars ductravg negleakavg atticrvalue eeravg ///
	sizeavg negsysageavg negwinrec negrrec
	
	local extrawords " yearbuiltaudit conditionedsqft"
		local felist="saleyear lsaleyear yearbuiltcat "
  
		local standardize=1
		local includemiss=0
		local yb_interact=0
		local sqft_interact=0

		local alltogether=0

		local reg_adjustment=0
		local use_othervars=1
		
		*NEED TO USE ONLY NM IF CONTROLLING FOR EE_ST OR EE_TOT
		local only_nm_allvars=1
		
		local allow_nm_for_ee=0
		
		

	gen n=1
	merge m:1 n using "C:\Users\awcassidy1\Dropbox\jmp_new/cleaned_data/survey_sum.dta"


	cap rename info_mean_ductsystem1type info_mean_duct_type
	cap rename info_se_ductsystem1type info_se_duct_type

	cap rename info_mean_furnacefueltype info_mean_fftype
	cap rename info_se_furnacefueltype info_se_duct_fftype
	
	cap rename info_mean_waterheaterfueltype info_mean_whftype
	cap rename info_se_waterheaterfueltype info_se_whftype
	

	if `use_othervars'==0 {
		local regvars= "`less_observable_vars' `more_observable_vars' "
		}
	if `use_othervars'==1 {
		local regvars ="`less_observable_vars' `more_observable_vars'  `other_less_observable_vars' `other_more_observable_vars'"
		}
		
	
		
	local words_tot=`: word count `regvars''
	di "words_tot=`words_tot'"

	if `only_nm_allvars'==1 {
		foreach v in `regvars' {
			di "v=`v'"
			keep if !missing(`v')
			}	
	}
	
	
	


	if `standardize'==1 {
		local scaledword=""
		local standword="_st"
		}
		
	if `standardize'==0 {
		local scaledword="scaled_"
		local standword=""
		}



	if `reg_adjustment'==1 {
		local raword="_ra"
		
		}
	else {
		local raword=""
		
	}
	



	eststo clear

	*make list of regressors
	local alltogether_regressorlist=""
	local extra_for_adjust=""
	foreach regvar in `regvars' {
		
		if regexm("`contsvars'","`regvar'")>0 {
			replace `regvar'=`scaledword'`regvar'`standword' if !missing(`regvar')
			}
		else {
			replace `regvar'=`regvar'`standword' if !missing(`regvar')
			}
		}
		
	egen ee_tot=rowtotal(`regvars')
	egen ee_st=rowmean(`regvars')

	
	local words_tot=`: word count `regvars''
	di "words_tot=`words_tot'"
	egen ee_nm=rownonmiss(`regvars')
	*scale ee_tot by number of variables
	
	replace ee_tot=ee_tot*(`words_tot'/ee_nm)
	
	if `allow_nm_for_ee'!=1 {
		replace ee_st=. if ee_nm!=`words_tot'
		replace ee_tot=. if ee_nm!=`words_tot'
		}
		
	sum ee_tot if !missing(pricediff)
	
	foreach regvar in `regvars' {

		sum `regvar'
		local mean=r(mean)


	
		if `includemiss'==1 & `reg_adjustment'==1 {
			local extrawords="post_mi_`regvar' c.post_mi_`regvar'#c.conditionedsqft_dm c.post_mi_`regvar'#c.yearbuilt_dm"
		}
		
		if `includemiss'==0 & `reg_adjustment'==0 {
			local extrawords="`extrawords'"		
		}
		
		
		if `includemiss'==1 & `reg_adjustment'==0 {
			local extrawords="post_mi_`regvar'"		
		}
		
		if `yb_interact'==1 & `sqft_interact'==0  {
			local estimate_extra="#c.yearbuilt_dm"
			local differential_word="Differential "
			local shortbyword "byyb"
			}
		if `sqft_interact'==1 & `yb_interact'==0  {
			local estimate_extra="#c.conditionedsqft_dm"
			local differential_word="Differential "
			local by_word=" by Sqft"
			local shortbyword "bysqft"
			}
		if `sqft_interact'==0 & `yb_interact'==0 {
			local estimate_extra=""
			local differential_word=""
			local by_word=""
			local shortbyword ""
			}
		if `reg_adjustment'==1 {
			local extra_for_adjust="c.`regvar'#c.conditionedsqft_dm  c.`regvar'#c.yearbuilt_dm c.conditionedsqft_dm c.yearbuilt_dm"
			}
		local alltogether_regressorlist="`alltogether_regressorlist' c.`regvar' `extrawords' `extra_for_adjust'"
		
		}
		
	if `alltogether'==1 {
			eststo: xi: reghdfe pricediff `alltogether_regressorlist' `extrawords' ///
			, absorb(`felist') vce(cl mlsid)
		
		}
		
		


	foreach regvar in `regvars' {
		if `alltogether'==0 {
			if `includemiss'==1 & `reg_adjustment'==1 {
				local extrawords="post_mi_`regvar' c.post_mi_`regvar'#c.conditionedsqft_dm c.post_mi_`regvar'#c.yearbuilt_dm"
				}
			if `includemiss'==0 & `reg_adjustment'==0 {
				local extrawords="`extrawords'"		
				}
			if `includemiss'==1 & `reg_adjustment'==0 {
				local extrawords="post_mi_`regvar'"		
				}
		if `reg_adjustment'==1 {
			local extra_for_adjust="c.`regvar'#c.conditionedsqft_dm  c.`regvar'#c.yearbuilt_dm c.conditionedsqft_dm c.yearbuilt_dm"
			}
			
			eststo: xi: reghdfe pricediff `regvar'  ///
			`extrawords' ///
			`extra_for_adjust' ///
				, absorb(`felist') vce(cl mlsid)
				
			}
		
			
		local v="`regvar'"
		local se=_se[c.`v'`estimate_extra']
		di "`se'"
		local b=_b[c.`v'`estimate_extra']
		
		*bc stores the coef value and bi stores the information value of that variable.
		local vnonst = subinstr("`v'","post_","",.)
		local vnonst = subinstr("`vnonst'","_st","",.)
		local vnonst = subinstr("`vnonst'","above_med_","",.)
		local vnonst = subinstr("`vnonst'","mi_","",.)
		local vnonst = subinstr("`vnonst'","scaled_","",.)

		gen bc_`vnonst'=`b'

		gen bi_`vnonst'= info_mean_`vnonst'
		gen lablab_`vnonst'= "`vnonst'"

		*upper and lower ci
		gen bu_`vnonst'= bc_`vnonst'  + 1.96 * `se'
		gen bl_`vnonst'= bc_`vnonst' - 1.96 * `se'
		}
	preserve
	keep if _n==1
	*keep if _n<=2
	gen id=_n
	keep bi_* bc_* bu_* bl_* id lablab_*
	reshape long bi_ bc_ bu_ bl_ lablab_, i(id) j(audvar) string
	sort bi_*

	gen conts_audvar = 0
	foreach v in `contsvars' {
		local vnonst = subinstr("`v'","_st","",.)
		local vnonst = subinstr("`vnonst'","above_med_","",.)
		local vnonst = subinstr("`vnonst'","post_","",.)
		local vnonst = subinstr("`vnonst'","mi_","",.)
		local vnonst = subinstr("`vnonst'","scaled_","",.)

		replace conts_audvar=1 if audvar=="`vnonst'"
	}

	reg bc_ bi_ 

	return scalar slope_b = _b[bi_]
	return scalar slope_se = _se[bi_]
	

local b=_b[bi_]
local overall_b: di %9.3fc `b'
local b_t1=`overall_b'/_se[bi_]
local overall_t: di %9.3fc `b_t1'

replace lablab_="-% Leakage" if lablab_=="negleakavg"
replace lablab_="Duct R-val" if lablab_=="ductravg"
replace lablab_="Size" if lablab_=="sizeavg"
replace lablab_="Prog Therm" if lablab_=="progtherm"
replace lablab_="EER" if lablab_=="eeravg"
replace lablab_="Attic R-val" if lablab_=="atticrvalue"
replace lablab_="Metal Ducts" if lablab_=="duct_type"
replace lablab_="Return Sizing" if lablab_=="rsavg"
replace lablab_="AH in Closet" if lablab_=="ah_location"
replace lablab_="Gas WH" if lablab_=="whftype"
replace lablab_="Gas Furnace" if lablab_=="fftype"
replace lablab_="- Sys Age" if lablab_=="negsysageavg"
replace lablab_="Vertical AH" if lablab_=="ah_type"
replace lablab_="Did not Rec WS" if lablab_=="notwinrec"
replace lablab_="Did not Rec Ins" if lablab_=="notraddr"
replace lablab_="2 Systems" if lablab_=="twosystems"



gen labpos=3
replace labpos=9 if lablab=="Gas Furnace"
replace labpos=9 if lablab=="- System Age"
replace labpos=12 if lablab=="Return Sizing"
replace labpos=5 if lablab=="Metal Ducts"
replace labpos=6 if lablab=="Prog Therm"
replace labpos=6 if lablab=="AH Location"
replace labpos=12 if lablab=="- % Leakage"
replace labpos=12 if lablab=="Prog Therm"



twoway (rcap bu_ bl_ bi_ , lcolor(green%30))(scatter bc_ bi_ , mcolor(green%30) mlabel(lablab) mlabcolor(black) mlabvposition(labpos))(lfit bc_ bi_, lcolor(pink)), yline(0, lcolor(purple)) xtitle(% Observing) ytitle("`differential_word' Capitalization of Feature `by_word'")  legend(off)


graph export "C:\Users\awcassidy1\Dropbox\jmp_new/figs/cap_by_feature`raword'_miss`includemiss'_stand`standardize'_`shortbyword'.pdf", as(pdf) replace


restore
end



use "C:\Users\awcassidy1\Dropbox\jmp_new\cleaned_data/data_with_observability_indices.dta", clear
drop _merge

keep if saleyear<=2015


sum rsavg if !missing(pricediff)
replace rsavg_st=(rsavg-r(mean))/r(sd)


cap gen negwinrec=-winrec

cap gen negrrec=-rrec

cap gen ah_location=system1locationairhandler
cap gen ah_location_st=system1locationairhandler_st
cap gen ah_type=system1airhandlertype
cap gen ah_type_st=system1airhandlertype_st
cap gen wh_tanktype=waterheatertanktype
cap gen wh_tanktype_st=waterheatertanktype_st

sum yearbuiltaudit if !missing(pricediff)
cap gen yearbuilt_dm=yearbuiltaudit-r(mean)

sum conditionedsqft if !missing(pricediff)
cap gen conditionedsqft_dm=conditionedsqft-r(mean)

local contsvars ductravg negleakavg atticrvalue eeravg ///
	sizeavg negsysageavg negwinrec negrrec
foreach v in `contsvars' {
	sum `v' if !missing(pricediff)
	cap gen scaled_`v'=(`v'-r(min))/(r(max)-r(min))

}


sum notwinrec if !missing(pricediff)
cap gen notwinrec_st=(notwinrec-r(mean))/r(sd)
replace notwinrec_st=(notwinrec-r(mean))/r(sd)

sum twosystems  if !missing(pricediff)
cap gen twosystems_st=(twosystems-r(mean))/r(sd)
replace twosystems_st=(twosystems-r(mean))/r(sd)

cap gen notraddr=(rrec==0) if !missing(rrec) 
sum notraddr if !missing(pricediff)
cap gen notraddr_st=(notraddr-r(mean))/r(sd)

la var negleakavg "-% Leakage" 
la var ductravg "Duct R-val"
la var atticrvalue "Attic R-val"
la var whftype "Gas WH"
la var fftype "Gas Furnace"
la var twosystems "2 Systems"
la var progtherm "Prog Therm"
la var ah_location "AH in Closet"

la var negleakavg_st "-% Leakage" 
la var ductravg_st "Duct R-val"
la var atticrvalue_st "Attic R-val"
la var whftype_st "Gas WH"
la var fftype_st "Gas Furnace"
la var twosystems_st "2 Systems"
la var progtherm_st "Prog Therm"
la var ah_location_st "AH in Closet"



capitalization_figure
